;;########################################################################
;; regress.lsp
;; XLisp-Stat Regression object used by
;; OLS, Robust & Monotonic Regression ViSta model object
;; Copyright (c) 1995-1996 by Carla M. Bann
;; Copyright (c) 1997-2001 by Carla M. Bann and Forrest W. Young
;;########################################################################

#|**************************************************************************
Define Multiple Optimal Regression Model Prototype
**************************************************************************|#

; (1) defproto

(defproto MORALS-PROTO
  '(iterations
    maximum-RSq
    minimum-RSq-improve
    RSq-list
    XRaw
    YRaw
    RSq  
    Yhat
    plot
    trans-plot-object
    initial-itermat
    count
    beta-list
    standardized-beta-list
    vista-object
    method)
  ()
  regression-model-proto
  "Multiple Optimal Regression Model")


; (2) - slot-accessor methods


(defmeth MORALS-PROTO :iterations (&optional (number nil set)) 
  (when set (setf (slot-value 'iterations) number)) 
  (slot-value 'iterations))


(defmeth MORALS-PROTO :count (&optional (number nil set)) 
  (when set (setf (slot-value 'count) number)) 
  (slot-value 'count))

(defmeth MORALS-PROTO :maximum-RSq (&optional (number nil set)) 
  (when set (setf (slot-value 'maximum-RSq) number)) 
  (slot-value 'maximum-RSq))


(defmeth MORALS-PROTO :minimum-RSq-improve (&optional (number nil set)) 
  (when set (setf (slot-value 'minimum-RSq-improve) number)) 
  (slot-value 'minimum-RSq-improve))


(defmeth MORALS-PROTO :RSq-list (&optional (list1 nil set)) 
  (when set (setf (slot-value 'RSq-list) list1)) 
  (slot-value 'RSq-list))

(defmeth MORALS-PROTO :RSq (&optional (number nil set)) 
  (when set (setf (slot-value 'RSq) number)) 
  (slot-value 'RSq))


(defmeth MORALS-PROTO :YRaw (&optional (list1 nil set)) 
  (when set (setf (slot-value 'YRaw) list1)) 
  (slot-value 'YRaw))


(defmeth MORALS-PROTO :XRaw (&optional (list1 nil set)) 
  (when set (setf (slot-value 'XRaw) list1)) 
  (slot-value 'XRaw))


(defmeth MORALS-PROTO :YHat (&optional (list1 nil set)) 
  (when set (setf (slot-value 'YHat) list1)) 
  (slot-value 'YHat))
 
(defmeth MORALS-PROTO :plot (&optional (logical nil set)) 
  (when set (setf (slot-value 'plot) logical)) 
  (slot-value 'plot))
;!!!!!
(defmeth MORALS-PROTO :trans-plot-object (&optional (obj-id nil set)) 
  (when set (setf (slot-value 'trans-plot-object) obj-id)) 
  (slot-value 'trans-plot-object))

(defmeth MORALS-PROTO :initial-itermat (&optional (mat nil set)) 
  (when set (setf (slot-value 'initial-itermat) mat)) 
  (slot-value 'initial-itermat))

(defmeth MORALS-PROTO :beta-list (&optional (list1 nil set)) 
  (when set (setf (slot-value 'beta-list) list1)) 
  (slot-value 'beta-list))

(defmeth MORALS-PROTO :standardized-beta-list (&optional (list1 nil set)) 
  (when set (setf (slot-value 'standardized-beta-list) list1)) 
  (slot-value 'standardized-beta-list))


(defmeth MORALS-PROTO :vista-object (&optional (obj-id nil set)) 
  (when set (setf (slot-value 'vista-object) obj-id)) 
  (slot-value 'vista-object))


; (3) - constructor function


(defun optimal-regression-model (x y &key 
                                   (intercept T)
                                   (print T)
                                   (plot T)
                                   weights
                                   (included (repeat t (length y)))
                                   predictor-names
                                   response-name
                                   case-labels                                  
                                   (iterations 20)
                                   (maximum-RSq 1.00)
                                   (minimum-RSq-improve .001)
                                   vista-object
                                   )
"Args: (x y &key (intercept T) (print T) (plot T) weights 
included predictor-names response-name case-labels maximum-rsq minimum-rsq-improve)
X	- list of independent variables or X matrix
Y	- dependent variable.
INTERCEPT - T to include (default), NIL for no intercept PRINT	- if not NIL print summary information
WEIGHTS	- if supplied should be the same length as Y; error variances are
assumed to be inversely proportional to WEIGHTS PREDICTOR-NAMES
RESPONSE-NAME
ITERATIONS
MAXIMUM-RSQ
MINIMUM-RSQ-IMPROVE
CASE-LABELS - sequences of strings or symbols. INCLUDED - if supplied should be the same length as Y, with elements nil 
to skip a in computing estimates (but not in residual analysis).
Returns a regression model object. To examine the model further assign the result to a variable and send it messages." 
  (let ((x (cond
             ((matrixp x) x)
             ((vectorp x) (list x))
             ((and (consp x) (numberp (car x))) (list x)) (t x)))
        (m (send MORALS-PROTO :new x y)))
    (send m :x (if (matrixp x) x (apply #'bind-columns x))) 
    (send m :y y)
    (send m :intercept intercept)
    (send m :weights weights)
    (send m :included included)
    (send m :predictor-names predictor-names) 
    (send m :response-name response-name)
    (send m :case-labels case-labels)
    (send m :iterations iterations)
    (send m :maximum-rsq maximum-rsq)
    (send m :minimum-rsq-improve minimum-rsq-improve) 
    (send m :vista-object vista-object)
    (send m :compute)
    (send m :count 0)
    (send m :iterate-once)
    (setf *Morals-Model* m)
    m))

; :ISNEW method

(defmeth MORALS-PROTO :isnew (x y)
  (call-next-method)
  (send self :Xraw X)
  (send self :Yraw Y)
  self)

; COMPUTE method
; Note that this method does not handle non-normalized data correctly.
; It is replaced by a compute method further in the code that does.
; However, this method iterates, whereas the later method doesn't.


(defmeth MORALS-PROTO :iter-history (w)
  (let* ((vista-obj (send self :vista-object))
         (model (if (equalp (send vista-obj :method) "Robust") 
                    (send vista-obj :robust-model) self))
         (n (length (send model :rsq-list)))
         (i 0)
         (improve nil)
         (minimprove nil)
         (itermat nil)
         (iterlist nil)
         (oldmat nil)
         )
    (if (equalp (send vista-obj :method) "OLS") (setf n 1))
    (loop (if (= i n) (return)) 
          (setf minimprove (select (send model :rsq-list) i)) 
          (when (= i 0)
                (setf improve "initial OLS value"))
          (when (> i 0)
                (setf improve (- (select (send model :rsq-list) i)
                                 (select (send model :rsq-list) (- i 1)))))
          (setf iterlist (list i minimprove improve))
          (when (= i 0) 
                (send model :initial-itermat (bind-rows iterlist)))
          (when (= i 1) 
                (setf itermat (bind-rows iterlist)))
          (when (> i 1)
                (setf itermat (bind-rows itermat iterlist)))
          (setf i (+ 1 i)) 
          )  
itermat
))

;fwy changed (send model :method) to 
;(let ... (method (send model :control-panel-method))
;    and replaced (send model :method) with method 
;    to fix report problems


(defmeth MORALS-PROTO :morals-display (&key (dialog nil))
  (let* ((itermat nil)
         (residmat nil)
         (levmat nil)
         (niter-now)
         (w nil)
         (model (send self :vista-object))
         (method (case (send model :control-panel-method) (0 "OLS") (1 "Monotonic") (2 "Robust")))
         
         (model2 (if (equalp method "Robust") 
                     (send model :robust-model) self))
         (weight-mat nil) (transf-mat nil) (result nil)
         (weights (if (equalp method "Robust")
                      (send (send model :robust-model) :weights))) 
        ; (method (send model :control-panel-method))
         (box nil))
;(one-button-dialog 
;  (format nil "~a" 
;          (list (send model :method) 
;                (send model :control-panel-method) method)))
    (if dialog
        (setf dialog 
              (choose-subset-dialog "Options"
                  '("Display Residuals" "Display Leverages")))
        
        (setf dialog (list nil)))
    (if (not (equalp method "OLS")) (setf dialog (combine dialog 2)))
    (when (not (equalp dialog nil))
          (setf box (if dialog (select dialog 0)))
          (setf w (report-header "Regression"))
          (if (= 1 (length (send self :predictor-names)))
              (display-string (format nil "Simple Regression Analysis~2%") w)
              (display-string 
               (format nil "Multiple Regression Analysis (~a)~2%"
                       method) w))
          (display-string 
           (format nil "Response Variable: ~a ~%" 
                   (first (select (send (send self :vista-object) :variables)
                           (send (send self :vista-object) :dv)))) w)
          (when (equalp method "Robust")
                (display-string 
                 (format nil "Robust Method Option:~%Maximum Iterations = ~d~%" 
                         (send model :iter)) w))
          (when (equalp method "Monotonic")
                (cond
                  (dialog
                   (display-string "Monotonic Iterations Performed when Requested by User" w))
                  (t
                    (display-string 
                     (format nil "Monotonic Method Options:~%Maximum Iterations  = ~d" 
                             (send model :iter)) w)
                    (display-string 
                     (format nil "~%Maximum R-Square    = ~d" 
                             (send model :max-rsq)) w)
                    (display-string 
                     (format nil "~%Minimum RSq Improve = ~d~%" 
                             (send model :min-rsq-improve)) w)))
                
                (setf itermat (send self :iter-history w))
                (setf itermat (bind-columns (col itermat 1) (col itermat 2)))
                (setf niter-now (first (array-dimensions itermat)))
                (cond 
                  ((= niter-now 1) (display-string (format nil "~3%ITERATIVE HISTORY:  NO ITERATIONS REQUESTED.~%NOTE: An iteration is performed when the user clicks the ITER~%button on the Iteration Control panel.  This report is of the~%initialization process, which is by Ordinary Least Squares.~%") w))
                  (t
                   (display-string (format nil "~3%ITERATIVE HISTORY:~%") W)
                   (print-matrix-to-window 
                    itermat w :decimals 5
                    :row-labels (mapcar #'(lambda (i) (format nil "Iter ~d" i)) 
                                        (iseq (first (array-dimensions itermat))))
                    :column-labels '("R-Square" "Improve")
                    :row-heading "Iterations"
                    :column-heading "Iterative Information"
                    ))))
                  
          
          (send self :predictor-names (select (send (send self :vista-object) :variables)
                                              (send (send self :vista-object) :iv)))
          (send (send model :robust-model) :predictor-names 
                (select (send 
                         (send self :vista-object) :variables)
                        (send (send self :vista-object) :iv)))
          (cond
            ((equalp method "Robust") 
             (send (send model :robust-model) :display w))
            ((equalp method "Monotonic")
             (send self :display w))
            ((equalp method "OLS")
             (send (send model :lin-reg-model) :display w)))
          (when (> (length (send model :iv)) 1)
                (display-string (format nil "~%VIF, Square root of VIF, and Multiple R-squared of Predictor Variables~%") w)
                (send model :vif)
                (print-matrix-to-window 
                 (bind-columns (send model :vif-list)
                               (sqrt (send model :vif-list))
                               (- 1 (/ 1 (send model :vif-list))))
                 w
                 :decimals 3
                 :row-labels (select (send model :variables) (send model :iv))
                 :column-labels (list "VIF" "SqrtVIF" "RSquare")
                 :row-heading "Predictors"
                 :column-heading "Measures")
          
          
                (display-string (format nil "~%Autocorrelation = ~,4g ~%" (send model :autocorr)) w))
    
          (when (equalp method "Robust")
                (print-matrix-to-window 
                 (bind-columns weights) w :decimals 3
                 :row-labels (send model :labels)
                 :column-labels (list "Weights")
                 :row-heading "Observations"
                 :column-heading "Robust Weights"))
                
          
          (when (equalp method "Monotonic")
                (setf transf-mat 
                      (bind-columns (send self :yraw) (send self :y)
                                    (send self :yhat)))
                (setf result 
                      (sort-and-permute-dob 
                       transf-mat (send model :labels) (send self :yraw) nil))
                
                (display-string (format nil "~%Monotonic Transformation:~%Ordered Raw Data, Transformed Data and Fitted Values~%~%") w)
                (print-matrix-to-window 
                 (select result 0) w :decimals 3
                 :row-labels (select result 1)
                 :column-labels '("Data" "TransfData" "Y-Hat")
                 :row-heading "Ordered Obs."
                 :column-heading "Transformation Information"))


          ;;new
          (when (member '0 box) (setf residmat (send self :print-residuals))
                (setf residmat (bind-columns residmat (send model :dffits)))
                (display-string (format nil "~2%RESIDUALS~%") w)
                (print-matrix-to-window 
                 residmat w :decimals 3
                 :row-labels  (send model :labels)
                 :column-labels '("Y" "Y-Hat" "Residuals" "Student" "External" "DFFITS")
                 :row-heading "Observations"
                 :column-heading "Residual Information"
                 ))
          
          (when (member '1 box) (setf levmat (send self :print-leverages))
                (display-string
                 (format nil "~2%LEVERAGES:~%") w)
                (print-matrix-to-window 
                 levmat w :decimals 3
                 :row-labels (send model :labels)
                 :column-labels '("Leverage" "CooksDist")
                 :row-heading "Observations"
                 :column-heading "Leverage Information"
                 ))
          
          
        
          (send w :fit-window-to-text)
          w
          )))

(defmeth MORALS-PROTO :print-residuals ()
  (let* ((vista-obj (send self :vista-object))
        ;(model (if (equalp (send vista-obj :method) "Robust") 
        ;           (send vista-obj :robust-model)
        ;           self))
         (model (case (send vista-obj :control-panel-method)
                  (0 (send vista-obj :lin-reg-model))
                  (1 self)
                  (2 (send vista-obj :robust-model))))
         (Y (send model :Y))
         (fit-values (send model :fit-values))
         (residuals (send model :residuals))
         (stud-resid (send model :studentized-residuals))
         (ext-stud-resid (send model :externally-studentized-residuals))
         )
    (bind-columns Y fit-values residuals stud-resid ext-stud-resid)))


(defmeth MORALS-PROTO :print-leverages ()
  (let* ((vista-obj (send self :vista-object))
         ;(model (if (equalp (send vista-obj :method) "Robust") 
         ;           (send vista-obj :robust-model) self))
         (model (case (send vista-obj :control-panel-method)
                  (0 (send vista-obj :lin-reg-model))
                  (1 self)
                  (2 (send vista-obj :robust-model))))
         (Lev (send model :leverages))
         (Cook (send model :cooks-distances))
         )
    (bind-columns lev cook)))


#|**************************************************************************
Define Secondary Least Squares Monotonic Transformation
**************************************************************************|#

(defun lsmt (ordinal interval)
"Args: ORDINAL INTERVAL
Computes the least squares monotonic transformation of ORDINAL given INTERVAL.
ORDINAL  is a vector or list of numbers of an ordinal  variable.
INTERVAL is a vector or list of numbers of an interval variable.
ORDINAL and INTERVAL must have the same number of elements.
Returns a vector or list of the numbers which are (weakly) in the same order
as ORDINAL and which are a least squares fit to INTERVAL. 
The secondary approach to ties is used (ties remain tied).
Written by Forrest W. Young 10/26/94"
(when (/= (length ordinal) (length interval))
        (error "The ordinal and interval variables must be the same length."))
  (let* ((n (length ordinal))
         (rank-order (order ordinal))
         (X (select interval rank-order))
         (Y (select ordinal  rank-order))
         (block-mean nil)
         (block-size nil)
         (tie-mean nil)
         (j nil)
         (lower nil)
         ) 
;force tied data to remain tied
    (when (/= n (length (remove-duplicates ordinal)))
          (dolist (i (iseq 0 (- n 2)))
                  (when (and (= (select Y i) (select Y (1+ i))) 
                             (not lower)) 
                        (setf lower i))
                  (when (and lower 
                             (or (< (select Y i) (select Y (1+ i)))
                                 (= i (- n 2))))
                        (setf tie-mean 0)
                        (when (= i (- n 2)) (setf i (- n 1)))
                        (dolist (j (iseq lower i))
                                (setf tie-mean (+ tie-mean (select X j))))
                        (setf tie-mean (/ tie-mean (- (1+ i) lower)))
                        (dolist (j (iseq lower i))
                                (setf (select X j) tie-mean))
                        (setf lower nil))))
    (setf Y (copy-list X))
;compute the least squares monotonic transformation
    (dolist (i (iseq 1 (1- n)))
            (setf block-size 1)
            (setf block-mean (select x i))
            (loop 
             (setf j (- i block-size))
             (when (or (< j 0) (>= block-mean (select y j))) (return))
             (setf block-size (1+ block-size))
             (setf block-mean  (/ (+ (* (1- block-size) block-mean) 
                                     (select x j)) block-size)))
            (dolist (k (iseq (1+ j) i)) (setf (select Y k) block-mean)))
    (select Y (order rank-order))))

#|**************************************************************************
Define other functions
**************************************************************************|#

(defun variance (variable)
    (^ (standard-deviation variable) 2))

(defun ssq (x)
    (sum (* x x)))


#|**************************************************************************
Re-define MORALS-PROTO compute method to work with overlay button
**************************************************************************|#

(defmeth MORALS-PROTO :iterate-once () 
  (let* ((bl nil)) 
   (cond 
    ((not (send self :yhat)) 
     (send self :Yhat (send self :fit-values))
     (send self :rsq (send self :r-squared))
     (send self :rsq-list (list (send self :rsq)))
     (setf bl (list (remove (select (send self :coef-estimates) 0) 
                                    (send self :coef-estimates))))
     (send self :beta-list bl)
     (send self :standardized-beta-list (list (send self :standardize-betas 
                                              (select bl 0)))))
    (t
     (let* ((YHat   (send self :fit-values))
            (MYHat  (Mean YHat))
            (STYHat (Standard-Deviation YHat))
            (YHNorm (/ (- YHat MYHat) STYHat))
            (YRaw   (send self :YRaw)) 
            (MYRaw  (Mean YRaw))
            (STYRaw (Standard-Deviation YRaw))
            (YU     (lsmt YRaw YHat))
            (MYU    (Mean YU))
            (STYU   (Standard-Deviation YU))
            (YUNorm (/ (- YU MYU) STYU))
            (YU*    (+ MYRaw (* YUNorm STYRaw)))
            (YHat*  (+ MYRaw (* YHNorm STYRaw (/ STYHat STYU)))))
       (send self :Yhat YHat*)
       (send self :Y    YU*)
       (send self :compute) 
       (send self :RSq (send self :r-squared))
       (send self :RSq-list (append (send self :RSq-list) 
                                    (list (send self :RSq))))
       (setf bl (list (remove (select (send self :coef-estimates) 0)
                                      (send self :coef-estimates))))
       (send self :beta-list (append (send self :beta-list) bl))
       (send self :standardized-beta-list 
             (append (send self :standardized-beta-list)
                     (list (send self :standardize-betas (select bl 0)))))
       (send self :count (+ 1 (send self :count))))
       ))))

(defmeth MORALS-PROTO  :standardize-betas (beta-list)
  (let* (
         (standardized-beta-list nil)
         (sd-x nil)
         (sd-y nil)
         (i 0)
         (vista-obj (send self :vista-object))
         (data-mat (send vista-obj :data-matrix))
         (nobs (send vista-obj :nobs))
         (n (length (send vista-obj :iv)))
         )
    (dotimes (i n sd-x)
             (setf sd-x 
                   (append sd-x (list (standard-deviation
                           (select data-mat
                                   (iseq nobs)
                                   (select (send vista-obj :iv) i)))))))
    (setf sd-y (standard-deviation (send self :y)))
    (setf standardized-beta-list 
          (* beta-list (/ sd-x sd-y)))
    ))